from pydantic import BaseModel, conlist
from typing import List, Tuple, Dict


class BaseJsonlModel(BaseModel):
    """
    A base model representing common fields for the various JSONL files.

    Fields:
    - annotation_id: Unique identifier for each task.
    - action_uid: Unique identifier for each action within the task.
    - image: The filename of the screenshot related to the action.
    - task: The description of the task being performed.
    - website: The name of the website where the task is performed.
    - domain: The domain category of the task (e.g., Info, Service).
    - subdomain: A more specific subdomain category (e.g., Housing, Shipping).
    - operation: The operation to be performed, such as CLICK, TYPE, SELECT.
    - value: Additional value associated with the operation, such as input text.
    - bbox: List of bounding box coordinates for the target elements.
    - step: The current step number in the task.
    - total_steps: The total number of steps in the task.
    - split: The dataset split (e.g., domain, website, task).
    """
    annotation_id: str
    action_uid: str
    image: str
    task: str
    website: str
    domain: str
    subdomain: str
    operation: str
    value: str
    bbox: List[Tuple[float, float, float, float]]
    step: int
    total_steps: int
    split: str


class SampleJsonlModel(BaseJsonlModel):
    """
    Represents the structure of each entry in the `sample_jsonl` file.

    Additional fields:
    - previous_actions: A list of previous actions taken before the current step.
    """
    previous_actions: List[str]


class SampleBlocksJsonlModel(BaseJsonlModel):
    """
    Represents the structure of each entry in the `sample_blocks_jsonl` file.

    Additional fields:
    - previous_actions: A list of previous actions taken before the current step.
    - blocks_path: Path to the folder containing image blocks.
    - target_blocks: A dictionary that maps block IDs to the bounding boxes of the target elements within their respective blocks.
    """
    previous_actions: List[str]
    blocks_path: str
    target_blocks: Dict[str, List[Tuple[float, float, float, float]]]


class PlanJsonlModel(BaseJsonlModel):
    """
    Represents the structure of each entry in the `plan_jsonl` file.

    Additional fields:
    - blocks_path: Path to the folder containing image blocks.
    - target_blocks: A dictionary that maps block IDs to the bounding boxes of the target elements within their respective blocks.
    - ans_block: The block that GPT finally selects.
    - gpt_action: The GPT-predicted action to take.
    - gpt_value: The value associated with the GPT-predicted action (if applicable).
    - description: The element description from the GPT response.
    - response: The full response generated by GPT.
    """
    blocks_path: str
    target_blocks: Dict[str, List[Tuple[float, float, float, float]]]
    ans_block: int
    gpt_action: str
    gpt_value: str
    description: str
    response: str

class QueryJsonlModel(BaseModel):
    """
    Represents the structure of each entry in the `query_jsonl` file.

    Fields:
    - id: A unique identifier for the query (usually derived from annotation_id and action_uid).
    - image: The path to the image file containing the element to query.
    - bbox: Bounding box coordinates of the target element.
    - description: A description of the element to interact with.
    - scale: Scale factor used to adjust coordinates for better accuracy.
    """
    id: str
    image: str
    bbox: List[Tuple[float, float, float, float]]
    description: str
    scale: float


class AnsJsonlModel(BaseModel):
    """
    Represents the structure of each entry in the `ans_jsonl` file.

    Fields:
    - id: A unique identifier for the answer (usually derived from annotation_id and action_uid).
    - image: The path to the image file related to the answer.
    - bbox: Bounding box coordinates for the target element.
    - description: Description of the target element as provided in the query.
    - scale: Scale factor used to adjust coordinates.
    - output: The predicted output coordinates (e.g., "(x, y)") from the model.
    """
    id: str
    image: str
    bbox: List[Tuple[float, float, float, float]]
    description: str
    scale: float
    output: str
